library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(stringr)
library(dplyr)
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten

R Markdown

#read whole data
events<-read.csv("D:\\meetups\\events.csv")
nyc_events<-filter(events,venue_city=="New York")
sf_events<-filter(events,venue_city=="San Francisco")
chi_events<-filter(events,venue_city=="Chicago")

#split up and output data
write.csv(nyc_events,"D:\\meetups\\nycevents.csv")
write.csv(sf_events,"D:\\meetups\\sfevents.csv")
write.csv(chi_events,"D:\\meetups\\chievents.csv")

#read json file
venues<-stream_in(file("D:\\meetup\\venues.json"))
#flatten the structure
venues<-flatten(venues)
#output as csv
venues<-as.data.frame(venues)
write.csv(venues,"D:\\meetup\\tvenues.csv")


groups<-stream_in(file("D:\\meetup\\groups.json"),pagesize=10000)
groups<-as.data.frame(flatten(groups))
groups<-mutate(groups,topic=toString(topics))
groups<-select(groups,-topics)
write.csv(groups,"D:\\meetup\\tgroups.csv")
#read chicago and new york data
chi_events<-read.csv("D:\\meetups\\chievents.csv")

nyc_events <- read.csv("D:\\meetups\\nycevents.csv")
#processing data 
#chi_ev_red<-select(chi_events,event_id,category_name,category_name2,category_id,createdtime,description,group_lat,group_lon,group_id,how_to_find_us,event_name,event_time,updated,utc_offset,venue_address_1,venue_address_2,venue_id,venue_lat,venue_lon,venue_name,yes_rsvp_count)

chi_mapping<-select(chi_events,event_id,category_name,venue_name,venue_lat,venue_lon,yes_rsvp_count)

head(chi_mapping)
#check na value
sum(is.na(chi_mapping))
## [1] 2
bin_cevents <- chi_mapping %>% group_by(venue_lon, venue_lat) %>%
 summarise(hap_times = n(),part_size=sum(yes_rsvp_count),avg_part_size=sum(yes_rsvp_count)/n()) %>%
 arrange(desc(part_size)) %>% ungroup()

head(bin_cevents)
bin_cate<- chi_mapping %>% group_by(category_name,venue_name,venue_lon,venue_lat)%>%summarise(hap_times=n(),part_size=sum(yes_rsvp_count),avg_part_size=sum(yes_rsvp_count)/n())%>%arrange(desc(part_size)) %>% ungroup()

head(bin_cate)
nyc_mapping<-select(nyc_events,event_id,category_name,venue_name,venue_lat,venue_lon,yes_rsvp_count)

sum(is.na(nyc_mapping))
## [1] 208
bin_nevents <- nyc_mapping %>% group_by(venue_lon, venue_lat) %>%
 summarise(hap_times = n(),part_size=sum(yes_rsvp_count),avg_part_size=sum(yes_rsvp_count)/n()) %>%
 arrange(desc(part_size)) %>% ungroup()

head(bin_nevents)
bin_ncate<- nyc_mapping %>% group_by(category_name,venue_name,venue_lon,venue_lat)%>%summarise(hap_times=n(),part_size=sum(yes_rsvp_count),avg_part_size=sum(yes_rsvp_count)/n())%>%arrange(desc(part_size)) %>% ungroup()

head(bin_ncate)
## Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
## Please cite ggmap if you use it! See citation("ggmap") for details.

Graphs and Analysis

#whole chicago area 
chimap = get_map(location = "chicago", maptype = "terrain", source = "google", zoom = 11)
## Source : https://maps.googleapis.com/maps/api/staticmap?center=chicago&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx-8tWb4ZPClGrekSBdMjU5q0KNF3Ks
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=chicago&key=xxx-8tWb4ZPClGrekSBdMjU5q0KNF3Ks
chimap = ggmap(chimap)

chimap + 
  geom_point(data = bin_cevents, aes(x = venue_lon, y = venue_lat,color = hap_times, size=avg_part_size),alpha=0.4)+
  scale_colour_gradient(name = '# The number of events', low="pink", high="blue") +
  scale_size(name = '# size of average participation each time', range = c(2,10))+labs(title="Where do Chicago people participate in-person events?")

chimap + 
  geom_point(data = bin_cate, aes(x = venue_lon, y = venue_lat,color = category_name, size=part_size),alpha=0.2)+
  scale_size(name = '# size of total participation', range = c(2,9))+labs(title = "Size of total participation of events in different categories")

#New York
nymap = get_map(location = "new york", maptype = "terrain", source = "google", zoom = 12)
nymap = ggmap(nymap)

nymap + 
  geom_point(data = bin_nevents, aes(x = venue_lon, y = venue_lat,color = hap_times, size=part_size),alpha=0.2)+
  scale_colour_gradient(name = '# The number of events', low="orange", high="blue") +
  scale_size(name = '# size of total participation', range = c(2,10))+labs(title="The number of people a place attracts to participate events there")

nymap + 
  geom_point(data = bin_ncate, aes(x = venue_lon, y = venue_lat,color = category_name, size=part_size),alpha=0.2)+
  scale_size(name = '# size of participation', range = c(2,9))+labs(title = "Size of total participation of events in different categories")

#nymap + geom_point(data = bin_ncate, aes(x = venue_lon, y = venue_lat,color = category_name, size=avg_part_size),alpha=0.2)+ scale_size(name = '# size of participation', range = c(2,9))+labs(title = "Size of average participation of events in different categories each time")
#Downtown Chicago
dt <- get_map(location = c(lon=-87.625177,lat=41.876858),zoom=14,maptype = "terrain")
## Source : https://maps.googleapis.com/maps/api/staticmap?center=41.876858,-87.625177&zoom=14&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx-8tWb4ZPClGrekSBdMjU5q0KNF3Ks
dt_map<-ggmap(dt)

dt_map+ geom_jitter(data = chi_mapping, aes(x = venue_lon, y = venue_lat,color = category_name) ,alpha=0.5,size=3.0)
## Warning: Removed 1237 rows containing missing values (geom_point).

dt_map+geom_point(data = bin_cate, aes(x = venue_lon, y = venue_lat,color = category_name, size=part_size),alpha=0.5)+
  scale_size(name = '# count of different Events', range = c(2,10))
## Warning: Removed 85 rows containing missing values (geom_point).

#Wicker park
wp<-get_map(location = c(lon=-87.679610,lat=41.909001),zoom=15,maptype = "terrain")
## Source : https://maps.googleapis.com/maps/api/staticmap?center=41.909001,-87.67961&zoom=15&size=640x640&scale=2&maptype=terrain&language=en-EN&key=xxx-8tWb4ZPClGrekSBdMjU5q0KNF3Ks
wpmap=ggmap(wp)

wpmap+geom_jitter(data = chi_mapping, aes(x = venue_lon, y = venue_lat,color = category_name) ,alpha=0.5,size=5.0)
## Warning: Removed 2098 rows containing missing values (geom_point).

wpmap+geom_point(data = bin_cate, aes(x = venue_lon, y = venue_lat,color = category_name, size=part_size),alpha=0.4)+
  scale_size(name = '# count of different Events', range = c(2,15))
## Warning: Removed 155 rows containing missing values (geom_point).